# Figure_1.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.


library(Bullock, lib.loc = c(.libPaths(), 'packageLibrary'))  # for qw()
library(dplyr)         # for %>%
library(grid)
library(gtable)        # for gtable_matrix(), used for strip text
library(Hmisc)         # for Dotplot()
library(lattice) 
library(lmtest)        # for coeftest()
library(multiwayvcov)  # for cluster.vcov, which helps to produce clustered SEs

dirOutput <- 'float_output/'  
source('IV_setup.R')
source('functions/estimateModels.R')
source('float_code/dotplotParameters.R')

filenameStem <- 'Figure_1'
PDFtitle     <- 'Figure 1: Effects of schooling laws and educational attainment'
IVModelsEnv <- importModels(
  filename      = 'IV_models.R', 
  endogVarNames = 'educ',
  checkModels   = TRUE)
varNames    <- qw('eqwlth goveqinc guarantee.7pt govt.health.7pt helppoor welfare')
dfNames     <- qw('GSS.df GSS.df   ANES.df       ANES.df         GSS.df   GSS.df') 
panelLabels <- list(
  'change in\nattitudes',
  'change in\nyears of schooling',
  expression(paste(italic(F), ~statistics)),
  'change in\nattitudes')
stripText <- list(
  list(paste0("Reduced\U00AD", "form"), "estimates"),
  list("First\U00ADstage", "estimates"),
  list("First\U00ADstage", expression(paste(bolditalic(F), bold(~statistics)))),
  list("IV", "estimates"))

# put labels for the reduced-form panel on the right
panelLabels <- panelLabels[c(2, 3, 1, 4)]
stripText   <- stripText  [c(2, 3, 1, 4)] 




##############################################################################
# PRELIMINARIES FOR DRAWING THE FIGURE
##############################################################################
panelHeight      <- list(2.500, "inches")
panelWidth       <- list(1.225, "inches")  
stripHeight      <- list(lines = 2.25, lineheight = 14) # lineheight is space between lines
panelBorderWidth <- .3
panelLayout      <- c(length(panelLabels), 1)  # columns, then rows
nPanels          <- panelLayout[1] * panelLayout[2] 
xBetween         <- 1.1000   # space between columns
yBetween         <- 2.9000   # space between rows
baseCexSize      <- 1.00     # cex
stripTextSize    <-  .700  * baseCexSize 
xLabSize         <- 0.855  * baseCexSize 
yAxisTextSize    <- 0.710  * baseCexSize    
xAxisTextSize    <-  .6575 * baseCexSize 
axisTickSize     <-  .425
xListLimits <- list(
  c(-.11, .11),   # reduced-form estimates
  c( 0, 1.3),     # first-stage estimates
  c(0, 106),      # first-stage F (bottom)
  c(-.11,  .11))  # 2SLS estimates  
xListLabels <- list(
  c('-.09', '-.03', '.03', '.09'),
  c('.2', '.5', '.8', '1.1'), 
  seq(15, 90, length = 4),
  c('-.09', '-.03', '.03', '.09'))
xListLimits <- xListLimits[c(2, 3, 1, 4)]
xListLabels <- xListLabels[c(2, 3, 1, 4)]  
xListAt <- lapply(xListLabels, as.numeric)
xList <- list(
  draw        = TRUE,   
  limits      = xListLimits,
  labels      = xListLabels,  
  at          = xListAt,
  tck         = c(axisTickSize, 0), 
  col         = panelBorderCol, 
  cex         = xAxisTextSize,
  alternating = 1, 
  relation    = 'free', 
  axs         = 'i')  # axs='i' means that there is no padding around the xlimits
yList <- list(
  draw        = TRUE,
  limits      = c(.35, 6.7),
  labels      = c(    
    'redist. to poor (1)', 
    'redist. to poor (2)',
    'standard of living',
    'health care',
    'help the poor',
    'welfare'),
  at          = 6:1,
  tck         = 0,
  col         = panelBorderCol,
  cex         = yAxisTextSize,
  alternating = c(1, 1),
  relation    = 'same',  
  axs         = 'i')



##############################################################################
# CREATE MODEL ENVIRONMENTS 
##############################################################################
# Remove all models except the baseline model.
with(IVModelsEnv, rm(list = ls(pattern = 'mod[2-9]')))


# CREATE REDUCED-FORM ENVIRONMENT
RF_models <- eapply(IVModelsEnv, getReducedForm)  
RF_models <- addModNumAttribute(RF_models) 
RF_models <- list2env(RF_models)


# CREATE FIRST-STAGE ENVIRONMENTS
IVModelsEnvFirstStage   <- eapply(IVModelsEnv, getFirstStage)
IVModelsEnvFirstStage   <- addModNumAttribute(IVModelsEnvFirstStage)
IVModelsEnvFirstStage   <- list2env(IVModelsEnvFirstStage)



##############################################################################
# CREATE DATA FRAMES
##############################################################################
GSS.df$educ1      <- GSS.df$educ2 <- GSS.df$educ5 <- GSS.df$educ6 <- GSS.df$educ
ANES.df$educ3     <- ANES.df$educ4 <- ANES.df$educ
GSS.df.eqwlth     <- GSS.df [!is.na(GSS.df$eqwlth)           & !is.na(GSS.df$educ), ]
GSS.df.goveqinc   <- GSS.df [!is.na(GSS.df$goveqinc)         & !is.na(GSS.df$educ), ]
ANES.df.guarantee <- ANES.df[!is.na(ANES.df$guarantee.7pt)   & !is.na(ANES.df$educ), ]
ANES.df.health    <- ANES.df[!is.na(ANES.df$govt.health.7pt) & !is.na(ANES.df$educ), ]
GSS.df.helppoor   <- GSS.df [!is.na(GSS.df$helppoor)         & !is.na(GSS.df$educ), ]
GSS.df.welfare    <- GSS.df [!is.na(GSS.df$welfare)          & !is.na(GSS.df$educ), ]
dfNames  <- qw(
  "GSS.df.eqwlth  GSS.df.goveqinc ANES.df.guarantee
   ANES.df.health GSS.df.helppoor GSS.df.welfare") 



##############################################################################
# ESTIMATE MODELS
##############################################################################

# ESTIMATE 2SLS MODELS
# We also produce the "mainEstsEduc" table here because doing so will make it 
# easier to extract the clustered SEs later on.
IVModelObjectsEnv <- estimateModels(
  varNames     = varNames, 
  modelEnvir   = IVModelsEnv, 
  dfNames      = dfNames,
  objectSuffix = '.IV') 
mainEstsEduc <- estTable(
  varNames    = varNames, 
  treatment   = 'educ', 
  objectEnvir = IVModelObjectsEnv,
  clusterSEs  = TRUE)


# ESTIMATE REDUCED-FORM MODELS
RF_modelObjectsEnv <- estimateModels(
  varNames     = varNames, 
  modelEnvir   = RF_models,  
  dfNames      = dfNames,
  estFun       = lm,
  objectSuffix = '.lm') 

# ESTIMATE FIRST-STAGE MODELS
varNamesFirstStage <- paste0('educ',   1:6)
firstStageModelObjectsEnv <- estimateModels(
  varNames     = varNamesFirstStage, 
  modelEnvir   = IVModelsEnvFirstStage,
  dfNames      = dfNames,
  estFun       = lm,
  objectSuffix = '.lm') 

# GET FIRST-STAGE F STATISTICS
Fstats <- eapply(
    env = IVModelObjectsEnv, 
    FUN = function (x) summary(x, diagnostics = TRUE)$diagnostics['Weak instruments', 'statistic']) %>%
  .[sort(names(.))]



##############################################################################
# CREATE DATA FRAME WITH DATA TO PLOT
##############################################################################
instrumentNames <- qw("CA.fac(7,10] CA.fac(10,100]")
treatmentName <- 'educ'
dataToPlot <- expand.grid(
  lower      = NA, 
  pred       = NA, 
  upper      = NA,
  instrument = instrumentNames,
  estimand   = c('F', 'firstStage', 'reducedForm', '2SLS'),
  depVar     = varNames)
dataToPlot <- dataToPlot[!(dataToPlot$estimand == 'F' & instrumentNames != instrumentNames[1]),]
dataToPlot$instrument[dataToPlot$estimand == 'F'] <- NA
dataToPlot <- dataToPlot[!(dataToPlot$estimand == '2SLS' & instrumentNames != instrumentNames[1]),]
dataToPlot$instrument[dataToPlot$estimand == '2SLS'] <- NA

for (varName in varNames) {
  varNameIndex <- which(varNames == varName)    
  tmpRF <- get(paste0(varName, '.lm1'), RF_modelObjectsEnv)
  tmpFS <- get(
    x     = paste0(treatmentName, varNameIndex, '.lm1'),  # e.g., "educ6.lm1" 
    envir = firstStageModelObjectsEnv)  

  # Record the first-stage F statistics.  
  dataToPlot[dataToPlot$estimand == 'F' & dataToPlot$depVar == varName, 'pred'] <- Fstats[paste0(varName, '.IV1')] 
    
  # Record the 2SLS estimates.  
  coef_2SLS   <- mainEstsEduc[1, varName]
  coefSE_2SLS <- mainEstsEduc[1, paste0(varName, '.se')]
  dataToPlot[dataToPlot$estimand == '2SLS' & dataToPlot$depVar == varName, qw("lower pred upper")] <- c(
    coef_2SLS - 1.96*coefSE_2SLS,
    coef_2SLS,
    coef_2SLS + 1.96*coefSE_2SLS)
  

  
  for (instrumentName in instrumentNames) {

    # Create the reduced-form cluster variable.  
    tmpRF_clusters <- tmpRF$model$stateYoung:factor(tmpRF$model$yearYoung)
    
    # Get the reduced-form clustered variance-covariance matrix.  
    tmpRF_clusterVcov <- cluster.vcov(tmpRF, tmpRF_clusters)
    
    # Record reduced-form coefficient and SE.  
    coefEstRF <- coeftest(tmpRF)[instrumentName, 'Estimate']
    coefSE_RF <- coeftest(tmpRF, vcov. = tmpRF_clusterVcov)[instrumentName, 'Std. Error']
    dataToPlot[dataToPlot$estimand == 'reducedForm' & dataToPlot$instrument == instrumentName & dataToPlot$depVar == varName, qw("lower pred upper")] <- c(
      coefEstRF - 1.96*coefSE_RF,
      coefEstRF,
      coefEstRF + 1.96*coefSE_RF)
  

    # Create the first-stage cluster variable.
    tmpFS_clusters <- tmpFS$model$stateYoung:factor(tmpFS$model$yearYoung)
    
    # Get the first-stage clustered variance-covariance matrix.  
    tmpFS_clusterVcov <- cluster.vcov(tmpFS, tmpFS_clusters)    
    
    # Record first-stage coefficient and SE.  
    coefEstFS <- coeftest(tmpFS)[instrumentName, 'Estimate']
    coefSE_FS <- coeftest(tmpFS, vcov. = tmpFS_clusterVcov)[instrumentName, 'Std. Error']
    dataToPlot[dataToPlot$estimand == 'firstStage' & dataToPlot$instrument == instrumentName & dataToPlot$depVar == varName, qw("lower pred upper")] <- c(
      coefEstFS - 1.96*coefSE_FS,
      coefEstFS,
      coefEstFS + 1.96*coefSE_FS)
  }
}  

# Order the panels 
dataToPlot$panelNum[dataToPlot$estimand == 'firstStage']  <- 1
dataToPlot$panelNum[dataToPlot$estimand == 'F']           <- 2
dataToPlot$panelNum[dataToPlot$estimand == 'reducedForm'] <- 3
dataToPlot$panelNum[dataToPlot$estimand == '2SLS']        <- 4  

  
# Assign row numbers (i.e., y coordinates) within each panel.  
rowNumAdjustmentForPairs <- .15
dataToPlot$rowNum        <- 7 - unclass(dataToPlot$depVar)  # Assign y-coordinates 1, 2, 3, 4, and 5
dataToPlot$rowNum[dataToPlot$instrument %in% instrumentNames[1]] <- dataToPlot$rowNum[dataToPlot$instrument %in% instrumentNames[1]] + rowNumAdjustmentForPairs  
dataToPlot$rowNum[dataToPlot$instrument %in% instrumentNames[2]] <- dataToPlot$rowNum[dataToPlot$instrument %in% instrumentNames[2]] - rowNumAdjustmentForPairs 

# Add an instrument name for the F-statistics rows. It makes plotting easier,
# because I use dataToPlot$instrument as the "groups" argument when I call
# Dotplot().  
levels(dataToPlot$instrument) <- c(levels(dataToPlot$instrument), 'noInstrument') 
dataToPlot$instrument[dataToPlot$estimand == qw('F 2SLS')] <- 'noInstrument'


# CRUDE FIX TO CLIP DATA DISPLAY TO PANEL LIMITS
# In the last panel, which reports baseline IV estimates, the CI for 
# education's effects on health-care attitudes extends beyond the right-hand 
# edge of the panel. To avoid dealing with the typical R/lattice clipping 
# problems, I just change the information about the CI in the dataToPlot
# data frame.  
index_upperCI_tooLarge <- which(dataToPlot$upper>xList$limits[[4]][2] & dataToPlot$estimand == '2SLS')
dataToPlot$upper[index_upperCI_tooLarge] <- xList$limits[[4]][2]



##############################################################################
# GET AUXILIARY INFORMATION
##############################################################################

if (interactive()) {

  # 2SLS ESTIMATES IN TERMS OF SD OF OUTCOMES
  tmpFun <- function (varName) { 
    index <- which(varNames %in% varName)
    tmp <- get(dfNames[index]) %>%
      as.data.frame
    sd(tmp[, varName])
  }
  SD_outcomes <- sapply(varNames, tmpFun)
  dataToPlot$pred[dataToPlot$estimand == '2SLS'] / SD_outcomes
  
  
  # SAMPLE SIZES
  firstStageN <- eapply(firstStageModelObjectsEnv, nobs)
  firstStageN[sort(names(firstStageN))]
  reducedFormN <- eapply(RF_modelObjectsEnv, nobs)
  reducedFormN[sort(names(reducedFormN))]
}



##############################################################################
# STRIP FUNCTION
##############################################################################
# It is difficult to have a multi-line text label in R when (a) the line 
# contains a plotmath expression like "italic(F)" and (b) you want to 
# fine-tune the spacing.  This function does the job.  
grid.expr <- function(labels, ..., width = NULL, heights = NULL, margin = unit(0.5,"line")) {  
  gl <- lapply(labels, textGrob, ...)  
  if (is.null(heights))
    heights <- do.call(unit.c, lapply(gl, grobHeight)) + margin  
  widths <- do.call(max, lapply(gl, grobWidth))
  gt <- gtable_matrix(
    name    = "table", 
    grobs   = matrix(gl, ncol = 1), 
    widths  = widths, 
    heights = heights)
  grid.draw(gt)
}

stripHorizontal <- function(...) {
  lpolygon(
    x        = c(0,1,1,0,0), 
    y        = c(0,0,1,1,0), 
    col      = stripBackgroundCol, 
    border   = stripBorderCol,
    linejoin = 'mitre',  # for square corners
    lwd      = panelBorderWidth)
  
  # ADD TEXT LABELS
  grid.expr(
    labels  = stripText[[panel.number()]],
    # heights = rep(unit(2, "line"), 2),
    margin = unit(.375, "line"),
    gp = gpar(font = 2, cex = stripTextSize)
  )    
}  



##############################################################################
# PANEL FUNCTION
##############################################################################
myPanel <- function(...) {
  if (panel.number() >= 3) {
    panel.abline(v = 0, col = dotplotLineColor)
  }
  panel.Dotplot(...)
   
  # Put tick marks at top of each panel
  panel.axis(
    side        = "top",
    at          = xList$at[[panel.number()]],
    draw.labels = FALSE,
    half        = FALSE,
    tck         = axisTickSize * .75)
  
  # LABELS BELOW EACH PANEL
  # If the labels are one line each, use panelLabelY == -.1675.
  trellis.par.set("clip", list(panel = "off", strip = "off")) 
  panelLabelY <- -.19
  grid.text(
    label = panelLabels[[panel.number()]], 
    x     = .5, 
    y     = panelLabelY, 
    gp    = gpar(
      cex        = xLabSize * .95,
      lineheight = baseCexSize)) 
}



##############################################################################
# MAKE THE PDF FILE
##############################################################################
pdf(
  file   = paste0(dirOutput, filenameStem, '.pdf'), 
  width  = PS_width, 
  height = PS_height, 
  paper  = "special", 
  title  = PDFtitle,
  bg     = postscriptBackground)
trellis.par.set(
  axis.components = list(left = list(pad1 = .5)),   # distance between axis ticks and tick labels
  axis.line = list( 
    alpha = 1, 
    col   = panelBorderCol,                         # to eliminate panel border, set col to "white"
    lty   = 1, 
    lwd   = panelBorderWidth),  
  clip = list(panel = "off", strip = "off"),
  dot.line = list(
    alpha = 1, 
    col   = dotplotLineColor, 
    lty   = 1, 
    lwd   = 1),
  dot.symbol = list(                                # no effect when "groups" is specified in the lattice command below
    alpha = 1, 
    cex   = .5, 
    col   = dotColor, 
    font  = 1, 
    pch   = c('L', 'S')),                           # pch = 16 is a dot           
  plot.line = list(
    alpha = 1,                                      # confidence interval in dotplots
    col   = dotColor, 
    lty   = 1, 
    lwd   = 1),
  superpose.line = list(                            # plotted line in xyplot, or confidence interval in dotplot when the "groups" argument is used in lattice
    alpha = 1, 
    col   = plotLineColor,                          # lower "gray" values are darker -- gray32 may be the best compromise across printers
    lty   = 'solid', 
    lwd   = 1.00))                             

reducedFormThroughFirstStageF_dotplot <- Dotplot(
  rowNum ~ Cbind(pred, lower, upper) | panelNum, 
  groups         = instrument,
  data           = dataToPlot,
  panel          = myPanel,
  layout         = panelLayout,
  as.table       = TRUE,
  between        = list(x = xBetween, y = yBetween),
  ###
  pch            = list('M', 'S', 16),  # for "moderate" and "strict" instruments; 16 is a dot
  font           = 2,
  cex            = c(.8 * baseCexSize, .8 * baseCexSize, .7 * baseCexSize),
  col            = dotColor,
  ###
  strip          = stripHorizontal,
  par.strip.text = stripHeight,
  scales         = list(x = xList, y = yList),
  xlab           = '',
  ylab           = '',
)
print(
  reducedFormThroughFirstStageF_dotplot, 
  panel.width  = panelWidth, 
  panel.height = panelHeight)


# SAVE AND PROCESS FILE
dev.off()
if (!(Sys.which('pdfcrop')=='' | Sys.which('perl')=='')  |  'pdfcrop' %in% installed.packages()[, 'Package']) {  # if "pdfcrop" is installed 
  system(paste(
    if (Sys.info()['sysname']=='Windows') paste(Sys.getenv('Comspec'), '/c ') else "",
    'pdfcrop', 
    paste0(dirOutput, filenameStem, '.pdf'), 
    paste0(dirOutput, filenameStem, '_crop.pdf')))
  file.remove(paste0(dirOutput, filenameStem, '.pdf'))
  if (interactive() & Sys.info()['sysname']=='Windows') shell.exec(paste0(normalizePath(dirOutput), filenameStem, '_crop.pdf'))
}

 
